package cn.edu.zju.om;


import java.util.regex.Matcher;
import java.util.regex.Pattern;

import junit.framework.Assert;

import org.htmlparser.filters.RegexFilter;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

public class DangdangProcessorTests {

	@Before
	public void setUp() throws Exception {
	}

	@After
	public void tearDown() throws Exception {
	}

	@Test
	public void testRegexURL() {
		Pattern p = Pattern.compile(".*product\\.aspx\\?product_id=.*");
		
		String str = "product.aspx?product_id=20645696&ref=product-0-A";
		
		Matcher matcher = p.matcher(str);
		
		System.out.println(matcher.matches());
 
	}
	
	@Test
	public void extractURL() {
		String str = "product.aspx?product_id=9036936&ref=product-0-A";
		
		String newStr = str.replaceAll(".*product\\.aspx\\?product_id=([0-9]+).*", "$1");
		
		Assert.assertEquals("9036936", newStr);
	}
}
